import re

import pandas as pd
from joblib import Parallel, delayed

from iflytech_assistant.client import instruct_with_profile as instruct

system_prompt = "给定你一段语音识别的结果，希望你帮我分析其中的错误原因。简短分析，不要长篇大论。50字以内"
user_prompt = "请根据你的分析，输出修正后的结果。"


@delayed
def run(input_):
    response = instruct(input_, system_prompt=system_prompt, temperature=0)
    cot = response.llm_response

    response = instruct(
        user_prompt,
        system_prompt=system_prompt,
        temperature=0.7,
        historys=[
            {"role": "user", "content": input_},
            {"role": "assistant", "content": cot},
        ],
    )
    result = response.llm_response

    result = re.sub(r'["“”$\\]', "", result)
    result = re.sub(r'^修正.*?[:：]', "", result)
    return result


def main():

    df = pd.read_csv(".vscode/语音识别优化/sharegpt.csv")

    inputs = df["语音识别"]
    tasks = [run(input_) for input_ in inputs]
    results = Parallel(n_jobs=1, verbose=10)(tasks)

    df["星辰微调结果"] = results
    df = df[["星辰微调结果", "用户实际发送"]]
    df.to_csv(".vscode/语音识别优化/xingchen.csv", index=False)


if __name__ == "__main__":
    main()
